library(SpatialExperiment)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.3.3
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(spatialLIBD)
library(ggplot2)
spe_weighted <- readRDS("reps_high_range_sigma.sq_high_beta_1000_genes_SVG_percentages_subset/50_percent_nonSVGs/spe_weighted_nnSVG_3.rds")
spe_unweighted <- readRDS("reps_high_range_sigma.sq_high_beta_1000_genes_SVG_percentages_subset/50_percent_nonSVGs/spe_nnSVG_3.rds")
adj_p_values_weighted <- p.adjust(1 - pchisq(rowData(spe_weighted)$weighted_LR_stat, df=2), method = "BH")
rowData(spe_weighted)$weighted_padj <- adj_p_values_weighted
# guiding question: understanding the difference between the weighted and unweighted simulations. why is the weighted simulation more conservative and lower TNR? why does the unweighted simulation capture the SVGs perfectly?
# make a 2x2 table of SVGs with high and low mean and high and low sigma.sq
# find mean of ground_truth_sigma.sq for nonzero values
nonzero_mean <- mean(rowData(spe_unweighted)$ground_truth_sigma.sq[rowData(spe_unweighted)$ground_truth_sigma.sq > 0], na.rm = TRUE)
svg_high_mean_high_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq > nonzero_mean & rowData(spe_unweighted)$mean > 1.5, na.rm = TRUE)
svg_high_mean_low_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq < nonzero_mean & rowData(spe_unweighted)$mean > 1.5, na.rm = TRUE)
svg_low_mean_high_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq > nonzero_mean & rowData(spe_unweighted)$mean < 1.5, na.rm = TRUE)
svg_low_mean_low_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq < nonzero_mean & rowData(spe_unweighted)$mean < 1.5, na.rm = TRUE)
svg_table <- matrix(c(svg_high_mean_high_sigma_sq, svg_high_mean_low_sigma_sq, svg_low_mean_high_sigma_sq, svg_low_mean_low_sigma_sq), nrow = 2, byrow = TRUE)
svg_table
## [,1] [,2]
## [1,] 164 128
## [2,] 97 111
# are the weighted and unweighted simulations capturing the same SVGs?
tp_unweighted <- which(rowData(spe_unweighted)$ground_truth_sigma.sq != 0 & rowData(spe_unweighted)$padj <= 0.05)
tp_weighted <- which(rowData(spe_weighted)$ground_truth_sigma.sq != 0 & rowData(spe_weighted)$weighted_padj <= 0.05)
# are the indices the same? check which indices in tp_unweighted are not in tp_weighted
length(tp_unweighted)
## [1] 498
length(tp_weighted)
## [1] 499
length(intersect(tp_unweighted, tp_weighted)) # one more TP in weighted
## [1] 498
# can we investigate the true positives that both are missing?
# are the weighted and unweighted simulations capturing the same non-SVGs?
tn_unweighted <- rowData(spe_unweighted)$ground_truth_sigma.sq == 0 & rowData(spe_unweighted)$padj > 0.05
tn_weighted <- rowData(spe_weighted)$ground_truth_sigma.sq == 0 & rowData(spe_weighted)$weighted_padj > 0.05
sum(tn_unweighted)
## [1] 496
sum(tn_weighted)
## [1] 446
length(intersect(which(tn_unweighted), which(tn_weighted)))
## [1] 444
# dig more into the differences in true negatives between the weighted and unweighted simulations
# find the gene indices of the true negatives that are not getting captured by the weighted simulation
tn_unweighted_not_weighted <- which(tn_unweighted & !tn_weighted)
length(tn_unweighted_not_weighted) # = 52
## [1] 52
# find the gene indices of the true negatives that are not getting captured by the unweighted simulation
tn_weighted_not_unweighted <- which(tn_weighted & !tn_unweighted)
length(tn_weighted_not_unweighted) # = 2
## [1] 2
# find the gene indices of the true negatives that are getting captured by both simulations
tn_both <- which(tn_weighted & tn_unweighted)
length(tn_both) # = 444
## [1] 444
# find the dist of the mean of the true negatives that are not getting captured by the weighted simulation
fivenum(rowData(spe_unweighted)$mean[tn_unweighted_not_weighted], na.rm = TRUE)
## [1] 0.5107987 0.7620930 1.2705551 2.0632300 2.7971909
# find the dist of the mean of the true negatives that are not getting captured by the unweighted simulation
fivenum(rowData(spe_unweighted)$mean[tn_weighted_not_unweighted], na.rm = TRUE)
## [1] 2.571575 2.571575 2.651517 2.731459 2.731459
# find the dist of the mean of the true negatives that are getting captured by both simulations
fivenum(rowData(spe_unweighted)$mean[tn_both], na.rm = TRUE)
## [1] 0.4461620 0.8303745 1.4035647 2.0767836 2.9228466
# find the distribution of mean weights for the true negatives that are not getting captured by the weighted simulation
# weights is spots x genes
weights <- readRDS("reps_high_range_sigma.sq_high_beta_1000_genes_SVG_percentages_subset/50_percent_nonSVGs/weights_3.rds")
fivenum(weights[,tn_unweighted_not_weighted])
## [1] 1.354652 1.635029 2.178387 3.226842 7.615690
fivenum(weights[,tn_weighted_not_unweighted])
## [1] 3.605145 4.422420 4.741563 5.102504 7.615690
fivenum(weights[,tn_both])
## [1] 1.354652 1.662119 2.243093 3.282970 7.615690
fivenum(colMeans(weights[,tn_unweighted_not_weighted]))
## [1] 1.416664 1.730319 2.192067 3.321970 5.539752
colMeans(weights[,tn_weighted_not_unweighted]) # there are only 2 genes
## [1] 5.056290 4.553305
fivenum(colMeans(weights[,tn_both]))
## [1] 1.387083 1.674768 2.262242 3.305554 5.902115
# weights are higher for the true negatives that are not getting captured by either simulation
# create some spot plots to visualize the differences
for (ix in tn_weighted_not_unweighted) {
df <- as.data.frame(cbind(spatialCoords(spe_unweighted), expr = logcounts(spe_unweighted)[ix, ]))
print(ggplot(df, aes(x = pxl_col_in_fullres, y = pxl_row_in_fullres,
color = expr)) +
geom_point(size = 2) +
coord_fixed() +
scale_y_reverse() +
scale_color_viridis_c(name = "logcounts") +
ggtitle("") +
theme_bw() +
theme(plot.title = element_text(face = "italic"),
panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
)
}


# create some spot plots to visualize the differences
for (ix in tn_unweighted_not_weighted) {
df <- as.data.frame(cbind(spatialCoords(spe_unweighted), expr = logcounts(spe_unweighted)[ix, ]))
print(ggplot(df, aes(x = pxl_col_in_fullres, y = pxl_row_in_fullres,
color = expr)) +
geom_point(size = 2) +
coord_fixed() +
scale_y_reverse() +
scale_color_viridis_c(name = "logcounts") +
ggtitle("") +
theme_bw() +
theme(plot.title = element_text(face = "italic"),
panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
)
}



















